package taobao

import com.o2o.utils.Iargs
import org.apache.spark.sql.SparkSession

/**
  * @ Auther: o2o-rd-0008
  * @ Date:   2021/2/22 14:36
  * @ Param:  ${PARAM}
  * @ Description: 
  */
object AddressCheck {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
      .appName(s"${this.getClass.getSimpleName}")
      .master("local[*]")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .config("spark.debug.maxToStringFields", "50000")
      .getOrCreate()

    val sc = spark.sparkContext
    sc.hadoopConfiguration.set("fs.s3a.access.key", Iargs.OBSACCESS)
    sc.hadoopConfiguration.set("fs.s3a.secret.key", Iargs.OBSSECRET)
    sc.hadoopConfiguration.set("fs.s3a.endpoint", Iargs.OBSENDPOINT)
    sc.setLogLevel("WARN")

    val path="s3a://o2o-dimension-table/address_table/address_table_2021/2/address_platform/intime_address_2021_2/"
//    val path="s3a://o2o-dataproces-group/zsc/standardData/shopAddress_company/shop_20210202/"
    spark.read.json(path).registerTempTable("tab")

    /**
      * 河北省	邢台市	桥东区
        河北省	邢台市	任县
        安徽省	芜湖市	无为县
        山西省	晋中市	太谷县
        山东省	聊城市	茌平县
        江西省	赣州市	龙南县
        河北省	邢台市	桥西区
        四川省	成都市	新津县
        广西壮族自治区	百色市	平果县
        江西省	南昌市	湾里区
        安徽省	芜湖市	芜湖县
        河南省	新乡市	长垣县
        河北省	邢台市	邢台县
        河北省	邢台市	南和县
        黑龙江省	黑河市	嫩江县
      */

    val resdf = spark.sql(
      """
        |select
        |*,
        |case
        |when province='海南省' and district='文昌市' then '省直辖县级行政区划'
        |when province='吉林省' and district='公主岭市' then '省直辖县级行政区划'
        |when province='河南省' and district='济源市' then '省直辖县级行政区划'
        |when province='湖北省' and district='仙桃市' then '省直辖县级行政区划'
        |when province='湖北省' and district='神农架林区' then '省直辖县级行政区划'
        |when province='湖北省' and district='天门市' then '省直辖县级行政区划'
        |when province='湖北省' and district='潜江市' then '省直辖县级行政区划'
        |else city
        |end city_new
        |,
        |case
        |when province='河北省' and city='邢台市' and district='桥东区' then '襄都区'
        |when province='河北省' and city='邢台市' and district='任县' then '任泽区'
        |when province='安徽省' and city='芜湖市' and district='无为县' then '无为市'
        |when province='山西省' and city='晋中市' and district='太谷县' then '太谷区'
        |when province='山东省' and city='聊城市' and district='茌平县' then '茌平区'
        |when province='江西省' and city='赣州市' and district='龙南县' then '龙南市'
        |when province='河北省' and city='邢台市' and district='桥西区' then '信都区'
        |when province='四川省' and city='成都市' and district='新津县' then '新津区'
        |when province='广西壮族自治区' and city='百色市' and district='平果县' then '平果市'
        |when province='江西省' and city='南昌市' and district='湾里区' then '新建区'
        |when province='安徽省' and city='芜湖市' and district='芜湖县' then '湾沚区'
        |when province='河南省' and city='新乡市' and district='长垣县' then '长垣市'
        |when province='河北省' and city='邢台市' and district='邢台县'  then (case when address rlike '豫让桥街道' or address rlike '晏家屯镇' or address rlike '祝村镇' or address rlike '东汪镇' then '襄都区' else '信都区' end)
        |when province='河北省' and city='邢台市' and district='南和县' then '南和区'
        |when province='黑龙江省' and city='黑河市' and district='嫩江县' then '嫩江市'
        |when province='陕西省' and city='延安市' and district='子长县' then '子长市'
        |when province='青海省' and city='西宁市' and district='湟中县' then '湟中区'
        |when province='青海省' and city='黄南藏族自治州' and district='同仁县' then '同仁市'
        |when province='新疆维吾尔自治区' and city='阿克苏地区' and district='库车县' then '库车市'
        |when province='安徽省' and city='芜湖市' and district='繁昌县' then '繁昌区'
        |when province='安徽省' and city='芜湖市' and district='三山区' then '弋江区'
        |when province='四川省' and city='遂宁市' and district='射洪县' then '射洪市'
        |when province='黑龙江省' and city='伊春市' and district='汤旺河区' then '汤旺县'
        |when province='河南省' and city='周口市' and district='淮阳县' then '淮阳区'
        |when province='江西省' and city='上饶市' and district='上饶县' then '广信区'
        |when province='湖南省' and city='邵阳市' and district='邵东县' then '邵东市'
        |when province='安徽省' and city='宣城市' and district='广德县' then '广德市'
        |when province='黑龙江省' and city='伊春市' and district='伊春区' then '伊美区'
        |when province='云南省' and city='玉溪市' and district='澄江县' then '澄江市'
        |when province='山东省' and city='烟台市' and district='蓬莱市' then '蓬莱区'
        |when province='江苏省' and city='南通市' and district='海门区' then '海门市'
        |when province='湖北省' and city='荆州市' and district='监利县' then '监利市'
        |else district
        |end district_new,
        |case
        |when province='河北省' and city='邢台市' and district='桥东区' then '130502'
        |when province='河北省' and city='邢台市' and district='任县' then '130505'
        |when province='安徽省' and city='芜湖市' and district='无为县' then '340281'
        |when province='山西省' and city='晋中市' and district='太谷县' then '140703'
        |when province='山东省' and city='聊城市' and district='茌平县' then '371503'
        |when province='江西省' and city='赣州市' and district='龙南县' then '360783'
        |when province='河北省' and city='邢台市' and district='桥西区' then '130503'
        |when province='四川省' and city='成都市' and district='新津县' then '510118'
        |when province='广西壮族自治区' and city='百色市' and district='平果县' then '451082'
        |when province='江西省' and city='南昌市' and district='湾里区' then '360112'
        |when province='安徽省' and city='芜湖市' and district='芜湖县' then '340210'
        |when province='河南省' and city='新乡市' and district='长垣县' then '410783'
        |when province='河北省' and city='邢台市' and district='邢台县' then (case when address rlike '%豫让桥街道%' or address rlike '%晏家屯镇%' or address rlike '%祝村镇%' or address rlike '%东汪镇%' then '130502' else '130503' end)
        |when province='河北省' and city='邢台市' and district='南和县' then '130506'
        |when province='黑龙江省' and city='黑河市' and district='嫩江县' then '231183'
        |when province='陕西省' and city='延安市' and district='子长县' then '610681'
        |when province='青海省' and city='西宁市' and district='湟中县' then '630122'
        |when province='青海省' and city='黄南藏族自治州' and district='同仁县' then '632321'
        |when province='新疆维吾尔自治区' and city='阿克苏地区' and district='库车县' then '652902'
        |when province='安徽省' and city='芜湖市' and district='繁昌县' then '340211'
        |when province='安徽省' and city='芜湖市' and district='三山区' then '340203'
        |when province='四川省' and city='遂宁市' and district='射洪县' then '510981'
        |when province='黑龙江省' and city='伊春市' and district='汤旺河区' then '230723'
        |when province='河南省' and city='周口市' and district='淮阳县' then '411603'
        |when province='江西省' and city='上饶市' and district='上饶县' then '361104'
        |when province='湖南省' and city='邵阳市' and district='邵东县' then '430582'
        |when province='安徽省' and city='宣城市' and district='广德县' then '341882'
        |when province='黑龙江省' and city='伊春市' and district='伊春区' then '230717'
        |when province='云南省' and city='玉溪市' and district='澄江县' then '530481'
        |when province='山东省' and city='烟台市' and district='蓬莱市' then '370614'
        |when province='江苏省' and city='南通市' and district='海门区' then '320684'
        |when province='湖北省' and city='荆州市' and district='监利县' then '421023'
        |when province='海南省' and district='文昌市' then '469005'
        |when province='吉林省' and district='公主岭市' then '220184'
        |when province='河南省' and district='济源市' then '419001'
        |when province='湖北省' and district='仙桃市' then '429004'
        |when province='湖北省' and district='神农架林区' then '429021'
        |when province='湖北省' and district='天门市' then '429006'
        |when province='湖北省' and district='潜江市' then '429005'
        |else regional_ID
        |end region_id_new
        |from
        |tab
      """.stripMargin).drop("city").drop("district").drop("regional_ID")
      .withColumnRenamed("city_new","city")
      .withColumnRenamed("district_new","district")
      .withColumnRenamed("region_id_new","regional_ID")

    resdf.write.json("s3a://o2o-dimension-table/address_table/address_table_2021/2/address_platform/intime_address_2021_2_new/")
//    resdf.write.json("s3a://o2o-dataproces-group/zsc/standardData/shopAddress_company/shop_20210222/")


  }
}
